# -*- coding: utf-8 -*-
import scrapy
from scrapy import Request
from comic.items import ComicItem
import os
import time


class IkkdmSpider(scrapy.Spider):
    name = 'ikkdm'
    allowed_domains = ['comic2.ikkdm.com', 'n2.1whour.com']
    start_urls = ['http://comic2.ikkdm.com/comiclist/146']

    def parse(self, response):
        items = []
        chapters = response.xpath('//dl[@id="comiclistn"]/dd/a[1]')
        for chapter in chapters:
            item = ComicItem()
            item['chapter_name'] = chapter.xpath('text()').extract_first()
            chapter_url = response.url + chapter.xpath('@href').extract_first().split('/146/')[1]
            item['chapter_url'] = chapter_url
            items.append(item)
        for item in items:
            yield Request(url=item['chapter_url'], meta={'item': item}, callback=self.parse_chapter_url)

    def parse_chapter_url(self, response):
        item = response.meta['item']
        item['chapter_url'] = response.url
        pages = response.xpath('//table[2]/tr/td/text()').re('共(.*?)页')[0]
        jpg_url = response.xpath('//table[2]/tr/td/script[1]').re('(src|SRC)=(.*?(jpg|JPG|png|PNG))')[1]
        for _ in ('\'', '+', '\"'):
            jpg_url = jpg_url.replace(_, '')
        jpg_url = jpg_url.replace('server', 'http://n5.1whour.com/')
        jpg_url = jpg_url.replace('m201304d', 'http://n5.1whour.com/')
        jpg_url = jpg_url.replace('m200911d', 'http://n5.1whour.com/')
        item['jpg_url'] = jpg_url
        yield item
        pre_link = item['chapter_url'][:-5]
        for page in range(2, int(pages) + 1):
            new_link = pre_link + str(page)+'.htm'
            yield Request(url=new_link, meta={'item': item}, callback=self.parse_jpg_url)

    def parse_jpg_url(self, response):
        item = response.meta['item']
        item['chapter_url'] = response.url
        jpg_url = response.xpath('//table[2]/tr/td/script[1]').re('(src|SRC)=(.*?(jpg|JPG|png|PNG))')[1]
        for _ in ('\'', '+', '\"'):
            jpg_url = jpg_url.replace(_, '')
        jpg_url = jpg_url.replace('server', 'http://n5.1whour.com/')
        jpg_url = jpg_url.replace('m201304d', 'http://n5.1whour.com/')
        jpg_url = jpg_url.replace('m200911d', 'http://n5.1whour.com/')
        item['jpg_url'] = jpg_url
        yield item




